Top Publishers
data_set2 <- data_set %>% gather("Region","Sales",c("NA_Sales","EU_Sales","JP_Sales","Other_Sales")) %>%
mutate(Region_updated = case_when(
Region == "NA_Sales" ~ "North America",
Region == "EU_Sales" ~ "Europe",
Region == "JP_Sales" ~ "Japan",
Region == "Other_Sales" ~ "Other",
TRUE ~ Region)) %>% group_by(Publisher,Genre) %>%
mutate(pub_sales = sum(Sales)) %>%
mutate(tooltip = paste0('Genre: ',Genre, '\n', 'Total Sales: ', pub_sales))
top_pub <- data_set2 %>% group_by(Publisher) %>% mutate(top_sales = sum(Sales)) %>%
select(Publisher,top_sales) %>% unique() %>% arrange(desc(top_sales)) %>%
head(n=5) %>% select(Publisher)
bar_data_set <- data_set2 %>% filter(Publisher %in% top_pub$Publisher)
bar_plot <- ggplot(bar_data_set,aes(fill= Genre,x=Sales/1000000,y=Publisher,tooltip = tooltip)) +
geom_bar_interactive(position="stack",stat="identity") + labs(x = 'Sales in Millions', y = 'Publishers', title = 'Top Video Game Genres by Publishers') + scale_color_brewer(palette="Set3")
ggiraph(ggobj = bar_plot)
## Warning: Using the `size` aesthetic in this geom was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` in the `default_aes` field and elsewhere instead.
Sales Region Tree map
tree_data_set <- data_set %>% select(-c("Platform","Rank")) %>% unique() %>%
gather("Region","Sales",c("NA_Sales","EU_Sales","JP_Sales","Other_Sales")) %>%
group_by(Name,Region) %>%
select(Name,Genre,Region,Sales) %>% mutate(Region_updated= Region) %>% unique() %>%
mutate(Region_updated = case_when(
Region == "NA_Sales" ~ "North America",
Region == "EU_Sales" ~ "Europe",
Region == "JP_Sales" ~ "Japan",
Region == "Other_Sales" ~ "Other",
TRUE ~ Region))
d3tree2(treemap(tree_data_set,index=c("Region_updated","Genre"),vSize="Sales",title= "Sales by Region and Genre"),rootname="Sales by Region and Genre")

Top 100 Video Games
point_df <- df1 %>% group_by(Name,Platform) %>% mutate(all_sales = sum(Sales)) %>% ungroup() %>%
mutate(tooltip = paste0(tooltip = paste0('Title: ', stringr::str_to_title(Name),
'\nRank: ', rank,
'\nSales: ', all_sales,
'\nPlatform: ', Platform))) %>% select(rank,Platform,Name,all_sales,tooltip) %>% unique()
viz1 <- ggplot(point_df, mapping=aes(x = rank, y = all_sales/1000000, color = Platform, tooltip = tooltip)) +
geom_point_interactive(size = 6, stat = "identity") +
scale_x_reverse() +
scale_color_brewer(palette = 'Set3') +
theme_minimal() +
labs(x = 'Rank', y = 'Sales in Millions', title = 'Sales of Top 100 Video Games') +
theme(plot.title =element_text(hjust = .5, size = 18), text = element_text(size = 12))
ggiraph(ggobj = viz1)
histo_df <- df3 %>%
group_by(Name,Region,Genre) %>% mutate(all_sales = sum(Sales)) %>% ungroup()
viz3 <- histo_df %>% ggplot(aes(x = all_sales/1000000)) +
geom_histogram(binwidth = 1, fill = "#A3E4D7", color = "#A3E4D7") +
labs(x = 'Total Sales(Millions)', y = 'Count', title = 'Histogram of Sales of Top 100 Games') +
theme(plot.title =element_text(hjust = .5, size = 18), text = element_text(size = 12))
ggiraph(ggobj = viz3)
Twitch Streams to Sales
bubble_df <- df2 %>% select(-c("Region")) %>%
mutate(tooltip = paste0(tooltip = paste0('Title: ', stringr::str_to_title(Name),
'\nGenre: ',Genre,
'\nHours Watched On Twitch (Millions): ', total_hours_watched/1000000,
'\nSales (Millions): ', all_sales/1000000,
'\nAverage Peak Channels: ', avg_peak_channels))) %>% unique()
viz2 <- ggplot(bubble_df, mapping = aes(x = total_hours_watched/1000000, y=all_sales/1000000, size = avg_peak_channels, color = Genre, tooltip = tooltip)) +
geom_point_interactive(alpha = .7) +
scale_size(range = c(1.4, 20), name="Average Peak Channels") +
labs(x = 'Hours Watched On Twitch in Millions', y = 'Sales in Millions', title = 'Hours Watched on Twitch to Sales') +
scale_color_brewer(palette = 'Set3') +
theme_minimal() +
theme(plot.title = element_text(hjust = .5, size = 18), text = element_text(size = 12))
ggiraph(ggobj = viz2)
User Rating Regression
uscore_data_set <- regression_data_set
uscore <- ggplot(uscore_data_set,mapping= aes(x=user_review,y=Sales/1000000)) + geom_point_interactive(aes(tooltip=user_tooltip)) + geom_smooth() + scale_color_brewer(palette="Set3") + scale_y_continuous(labels= scales::comma) +
labs(x= "User Rating (1-100)",y= "Sales in Millions",title= "User Rating Regression on Global Sales") +
theme(plot.title = element_text(hjust = .5, size = 18), text = element_text(size = 12))
ggiraph(ggobj=uscore)
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
uscore_histo <- regression_data_set
viz5 <- uscore_histo %>% ggplot(aes(x = user_review)) +
geom_histogram(binwidth = 1, fill = "#A3E4D7", color = "#A3E4D7") +
labs(x = 'User Score', y = 'Count', title = 'Histogram of User Score') +
theme(plot.title =element_text(hjust = .5, size = 18), text = element_text(size = 12))
ggiraph(ggobj = viz5)